Body Size from Unconventional Specimens: A 3D Geometric Morphometrics Approach to Fishes from Ancestral Pueblo Contexts

code
morphometry
allometry
biometrics
3D GMM
Author

Jonathan Dombrosky

Published

February 12, 2023

The following is the exact supplemental Rmarkdown file from this Journal of Archaeological Science article. It contains a bunch of extra analyses, like a consideration of intra- and interanalyst error.

INTRODUCTION

This Rmarkdown file is organized into six major sections: Introduction, Packages Needed, Validating Centroid Size-Based Body Size Reconstruction, Archaeological Body Size Estimations, Modern Comparisons, and Intra- Interindividual Error Testing. This file corresponds to analyses and figures produced in the manuscript Body Size from Unconventional Specimens. However, it also provides supplemental figures and analyses not presented in the body of the manuscript. All data are directly imported from their most raw formats (housed in corresponding folders in this Supplemental file) so that data manipulation is explicit and analyses are reproducible. The section INTRA- AND INTERINDIVIDUAL ERROR TESTING and the subsection Error Associated with SL to TL Length-Length Conversion are referenced to in the manuscript but statistical analyses and interpretation are presented here.

PACKAGES NEEDED

library(geomorph)
library(dplyr)
library(tidyr)
library(ggplot2)
library(effectsize)
library(ggrepel)
library(ggExtra)

VALIDATING CENTROID SIZE-BASED BODY SIZE RECONSTRUCTION

mydata <-read.table(
  "Validating Centroid Size/Vertebra_Analysis_Centroid.txt", header=TRUE, 
  row.names=1, stringsAsFactors = FALSE)

body.size <- read.table("Basic Files/Body_Size.txt", header=TRUE)

width <- read.table("Validating Centroid Size/Vertebra_Analysis_Width.txt",
                    header=TRUE)

species <- read.table("Basic Files/Species.txt", header=TRUE)

a <-arrayspecs(mydata, ncol(mydata)/3, 3)

mydata.gpa <- gpagen(a, curves = NULL, surfaces = NULL, PrinAxes = TRUE, 
                     max.iter = NULL, ProcD = TRUE, Proj = TRUE, 
                     print.progress = FALSE)

centroid.df <- data.frame(mydata.gpa$Csize)
centroid.df <- tibble::rownames_to_column(centroid.df, "ID")

centroid.clean <- centroid.df %>% 
  separate("ID", into = c("ID", "Vert_Num")) %>%
  merge(body.size, by="ID") %>%
  dplyr::rename(Csize = mydata.gpa.Csize)

centroid.clean.width <- width %>% 
  separate("ID", into = c("ID", "Vert_Num")) %>%
  merge(centroid.clean, by= c("ID", "Vert_Num"))

lm1 <- lm(data = centroid.clean.width, SL ~ Width)

lm2 <- lm(data = centroid.clean, SL ~ Csize)

full.dataset <- centroid.clean.width %>%
  mutate(Size.Centroid = (lm2$coefficients[[2]]*Csize)+lm2$coefficients[[1]],
         Size.Width = (lm1$coefficients[[2]]*Width)+lm1$coefficients[[1]],
         PE.Centroid = ((SL - Size.Centroid)*100)/Size.Centroid,
         PE.Width = ((SL - Size.Width)*100)/Size.Width)

MPE <- full.dataset %>%
  group_by(ID) %>%
  dplyr::summarize(MPE.Centroid = mean(PE.Centroid),
                   MPE.Width = mean(PE.Width))

Standard Length and Centrum Width

p <- ggplot(data = full.dataset, mapping = aes(x = Width, y = SL))

p + geom_point(aes(color = ID), alpha = 0.5, size = 3) + 
  geom_smooth(formula = y ~ x, method = "lm", size = 1.25, 
              color = "#4d4d4d") +
  theme_classic() +
  ylim(166, 500) +
  annotate("text", x = Inf, y = 166, 
           label =
             "paste(italic(R^2), \" = .91 \")",
           parse = T, hjust = 1, vjust = 0, color = "#4d4d4d") +
  labs(x = "Centrum Width (mm)", y = "Standard Length (mm)") +
  theme(legend.position = "none",
        axis.line = element_line(color = "#4d4d4d", size = 1),
        axis.text.x = element_text(color = "#4d4d4d", size = 12),
        axis.text.y = element_text(color = "#4d4d4d", size = 12),
        axis.title.x = element_text(color = "#4d4d4d", size = 14, 
                                    face = "bold"),
        axis.title.y = element_text(color = "#4d4d4d", size = 14, 
                                    face = "bold"),
        axis.ticks.x = element_line(color = "#4d4d4d", size = 1),
        axis.ticks.y = element_line(color = "#4d4d4d", size = 1))

summary(lm(data = full.dataset, SL ~ Width))

Call:
lm(formula = SL ~ Width, data = full.dataset)

Residuals:
    Min      1Q  Median      3Q     Max 
-51.589 -11.046   0.586  10.286  59.150 

Coefficients:
            Estimate Std. Error t value Pr(>|t|)    
(Intercept)  40.4340     5.3646   7.537 1.36e-12 ***
Width        43.8625     0.9596  45.711  < 2e-16 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 17.73 on 213 degrees of freedom
Multiple R-squared:  0.9075,    Adjusted R-squared:  0.9071 
F-statistic:  2090 on 1 and 213 DF,  p-value: < 2.2e-16

Standard Length and Centroid Size

p <- ggplot(data = full.dataset, mapping = aes(x = Csize, y = SL))
p + geom_point(aes(color = ID), alpha = 0.5, size = 3) + 
  geom_smooth(formula = y ~ x, method = "lm", size = 1.25, 
              color = "#4d4d4d") +
  theme_classic() +
  ylim(166, 500) +
  annotate("text", x = Inf, y = 166, 
           label = 
             "paste(italic(R^2), \" = .86 \")",
           parse = T, hjust = 1, vjust = 0, color = "#4d4d4d") +
  labs(x = "Centroid Size", y = "Standard Length (mm)") +
  theme(legend.position = "none",
        axis.line = element_line(color = "#4d4d4d", size = 1),
        axis.text.x = element_text(color = "#4d4d4d", size = 12),
        axis.text.y = element_text(color = "#4d4d4d", size = 12),
        axis.title.x = element_text(color = "#4d4d4d", size = 14, 
                                    face = "bold"),
        axis.title.y = element_text(color = "#4d4d4d", size = 14, 
                                    face = "bold"),
        axis.ticks.x = element_line(color = "#4d4d4d", size = 1),
        axis.ticks.y = element_line(color = "#4d4d4d", size = 1))

summary(lm(data = full.dataset, SL ~ Csize))

Call:
lm(formula = SL ~ Csize, data = full.dataset)

Residuals:
    Min      1Q  Median      3Q     Max 
-39.810 -14.574  -3.581  12.309 100.548 

Coefficients:
            Estimate Std. Error t value Pr(>|t|)    
(Intercept)  44.3412     6.5309   6.789  1.1e-10 ***
Csize        21.3524     0.5783  36.920  < 2e-16 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 21.43 on 213 degrees of freedom
Multiple R-squared:  0.8649,    Adjusted R-squared:  0.8642 
F-statistic:  1363 on 1 and 213 DF,  p-value: < 2.2e-16

Centrum Width Estimation and Centroid Size Estimation

p <- ggplot(data = full.dataset, mapping = aes(x = Size.Centroid, 
                                                y = Size.Width))
p + geom_point(aes(color = ID), alpha = 0.5, size = 3) + 
  geom_smooth(formula = y ~ x, method = "lm", size = 1.25, 
              color = "#4d4d4d") +
  theme_classic() +
  annotate("text", x = Inf, y = 160, 
           label = "paste(italic(R^2), \" = .94 \")",
           parse = T, hjust = 1, vjust = 0, color = "#4d4d4d") +
  labs(x = "Centroid Size Estimated Standard Length (mm)", 
       y = "Centrum Width Estimated Standard Length (mm)") +
  theme(legend.position = "none",
        axis.line = element_line(color = "#4d4d4d", size = 1),
        axis.text.x = element_text(color = "#4d4d4d", size = 12),
        axis.text.y = element_text(color = "#4d4d4d", size = 12),
        axis.title.x = element_text(color = "#4d4d4d", size = 13.5, 
                                    face = "bold"),
        axis.title.y = element_text(color = "#4d4d4d", size = 13.5, 
                                    face = "bold"),
        axis.ticks.x = element_line(color = "#4d4d4d", size = 1),
        axis.ticks.y = element_line(color = "#4d4d4d", size = 1))

summary(lm(data = full.dataset, Size.Width ~ Size.Centroid))

Call:
lm(formula = Size.Width ~ Size.Centroid, data = full.dataset)

Residuals:
    Min      1Q  Median      3Q     Max 
-37.659  -7.359  -0.599   6.340  44.890 

Coefficients:
              Estimate Std. Error t value Pr(>|t|)    
(Intercept)    2.42343    5.03004   0.482     0.63    
Size.Centroid  0.99132    0.01768  56.072   <2e-16 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 13.99 on 213 degrees of freedom
Multiple R-squared:  0.9366,    Adjusted R-squared:  0.9363 
F-statistic:  3144 on 1 and 213 DF,  p-value: < 2.2e-16

Predicion Error (PE)

p <- ggplot(data = full.dataset, 
            mapping = aes(x = PE.Centroid, y = PE.Width))
p + geom_point(aes(color = ID), alpha = 0.5, size = 3) + 
  geom_smooth(formula = y ~ x, method = "lm", size = 1.25, 
              color = "#4d4d4d") +
  theme_classic() +
  annotate("text", x = Inf, y = -15, 
           label = "paste(italic(R^2), \" = .56 \")",
           parse = T, hjust = 1, vjust = 0, color = "#4d4d4d") +
  labs(x = "PE Centroid Size", y = "PE Centrum Width") +
  theme(legend.position="none",
        axis.line = element_line(color = "#4d4d4d", size = 1),
        axis.text.x = element_text(color = "#4d4d4d", size = 12),
        axis.text.y = element_text(color = "#4d4d4d", size = 12),
        axis.title.x = element_text(color = "#4d4d4d", size = 14, 
                                    face = "bold"),
        axis.title.y = element_text(color = "#4d4d4d", size = 14, 
                                    face = "bold"),
        axis.ticks.x = element_line(color = "#4d4d4d", size = 1),
        axis.ticks.y = element_line(color = "#4d4d4d", size = 1))

summary(lm(data = full.dataset, PE.Width ~ PE.Centroid))

Call:
lm(formula = PE.Width ~ PE.Centroid, data = full.dataset)

Residuals:
    Min      1Q  Median      3Q     Max 
-8.8205 -2.8283 -0.0026  2.1242 12.3219 

Coefficients:
            Estimate Std. Error t value Pr(>|t|)    
(Intercept) -0.07220    0.28069  -0.257    0.797    
PE.Centroid  0.60946    0.03733  16.326   <2e-16 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 4.116 on 213 degrees of freedom
Multiple R-squared:  0.5558,    Adjusted R-squared:  0.5537 
F-statistic: 266.5 on 1 and 213 DF,  p-value: < 2.2e-16

Predicted SL w/ Centroid Size ~ Actual SL Centroid Size underpredits values

full.dataset %>%
  ggplot(mapping = aes(x = SL, y = Size.Centroid)) +
  geom_point(aes(color = ID), alpha = 0.5, size = 3) + 
  geom_smooth(formula = y ~ x, method = "lm", size = 1.25, 
              color = "#4d4d4d") +
  theme_classic() +
  labs(x = "Actual SL", y = "Predicted SL w/ Centroid Size") +
  annotate("text", x = Inf, y = 175, 
           label = "paste(italic(R^2), \" = .86 \")",
           parse = T, hjust = 1, vjust = 0, color = "#4d4d4d") +
  theme(legend.position="none",
        axis.line = element_line(color = "#4d4d4d", size = 1),
        axis.text.x = element_text(color = "#4d4d4d", size = 12),
        axis.text.y = element_text(color = "#4d4d4d", size = 12),
        axis.title.x = element_text(color = "#4d4d4d", size = 14, 
                                    face = "bold"),
        axis.title.y = element_text(color = "#4d4d4d", size = 14, 
                                    face = "bold"),
        axis.ticks.x = element_line(color = "#4d4d4d", size = 1),
        axis.ticks.y = element_line(color = "#4d4d4d", size = 1))

summary(lm(data = full.dataset, Size.Centroid ~ SL))

Call:
lm(formula = Size.Centroid ~ SL, data = full.dataset)

Residuals:
   Min     1Q Median     3Q    Max 
-76.13 -10.50   1.93  11.25  45.98 

Coefficients:
            Estimate Std. Error t value Pr(>|t|)    
(Intercept) 37.75302    6.68342   5.649 5.14e-08 ***
SL           0.86485    0.02343  36.920  < 2e-16 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 19.93 on 213 degrees of freedom
Multiple R-squared:  0.8649,    Adjusted R-squared:  0.8642 
F-statistic:  1363 on 1 and 213 DF,  p-value: < 2.2e-16

Predicted SL w/ Centrum Width ~ Actual SL

full.dataset %>%
  ggplot(mapping = aes(x = SL, y = Size.Width)) +
  geom_point(aes(color = ID), alpha = 0.5, size = 3) + 
  geom_smooth(formula = y ~ x, method = "lm", size = 1.25, 
              color = "#4d4d4d") +
  theme_classic() +
  labs(x = "Actual SL", y = "Predicted SL w/ Centrum Width") +
  annotate("text", x = Inf, y = 175, 
           label = "paste(italic(R^2), \" = .91 \")",
           parse = T, hjust = 1, vjust = 0, color = "#4d4d4d") +
  theme(legend.position="none",
        axis.line = element_line(color = "#4d4d4d", size = 1),
        axis.text.x = element_text(color = "#4d4d4d", size = 12),
        axis.text.y = element_text(color = "#4d4d4d", size = 12),
        axis.title.x = element_text(color = "#4d4d4d", size = 14, 
                                    face = "bold"),
        axis.title.y = element_text(color = "#4d4d4d", size = 14, 
                                    face = "bold"),
        axis.ticks.x = element_line(color = "#4d4d4d", size = 1),
        axis.ticks.y = element_line(color = "#4d4d4d", size = 1))

summary(lm(data = full.dataset, Size.Width ~ SL))

Call:
lm(formula = Size.Width ~ SL, data = full.dataset)

Residuals:
    Min      1Q  Median      3Q     Max 
-52.495 -10.535  -1.994  10.281  55.812 

Coefficients:
            Estimate Std. Error t value Pr(>|t|)    
(Intercept) 25.84186    5.66415   4.562 8.54e-06 ***
SL           0.90749    0.01985  45.711  < 2e-16 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 16.89 on 213 degrees of freedom
Multiple R-squared:  0.9075,    Adjusted R-squared:  0.9071 
F-statistic:  2090 on 1 and 213 DF,  p-value: < 2.2e-16
full.dataset %>%
  select(PE.Centroid, PE.Width) %>%
  gather(key = "PE", value = "value") %>%
  ggplot(aes(x = value, color = PE, fill = PE)) +
  geom_density(size = 1.5, alpha = 0.5) +
  annotate(geom = "text", label = "Centroid Size", x = -5.5, y = .05, hjust = 1, 
           vjust = 0.5, color = "#f8766d", fontface = "bold") +
  annotate(geom = "text", label = "Width", x = 4, y = .06, hjust = 0, 
           vjust = 0.5, color = "#00bfc4", fontface = "bold") +
  theme_classic() +
  theme(legend.position = "none",
        axis.line = element_line(color = "#4d4d4d", size = 1),
        axis.text.x = element_text(color = "#4d4d4d", size = 12),
        axis.text.y = element_text(color = "#4d4d4d", size = 12),
        axis.title.x = element_text(color = "#4d4d4d", size = 14, 
                                    face = "bold"),
        axis.title.y = element_text(color = "#4d4d4d", size = 14, 
                                    face = "bold"),
        axis.ticks.x = element_line(color = "#4d4d4d", size = 1),
        axis.ticks.y = element_line(color = "#4d4d4d", size = 1)) +
  geom_vline(xintercept = 0, linetype = 2, color = "#4d4d4d", size = 1) +
  labs(x = "Prediction Error", y = "Density") +
  scale_y_continuous(expand = c(0, 0)) +
  scale_x_continuous(expand = c(0, 0))

t.test(full.dataset$PE.Centroid, full.dataset$PE.Width, var.equal = TRUE)

    Two Sample t-test

data:  full.dataset$PE.Centroid and full.dataset$PE.Width
t = 0.12797, df = 428, p-value = 0.8982
alternative hypothesis: true difference in means is not equal to 0
95 percent confidence interval:
 -1.219891  1.389806
sample estimates:
  mean of x   mean of y 
 0.03266882 -0.05228858 
p <- ggplot(data = full.dataset, 
            mapping = aes(x = PE.Centroid, y = PE.Width)) +
  geom_point(aes(color = ID), alpha = 0.5, size = 3) + 
  geom_smooth(formula = y ~ x, method = "lm", size = 1.25, 
              color = "#4d4d4d") +
  theme_classic() +
  annotate("text", x = Inf, y = -15, 
           label = "paste(italic(R^2), \" = .56 \")",
           parse = T, hjust = 1, vjust = 0, color = "#4d4d4d") +
  labs(x = "PE Centroid Size", y = "PE Centrum Width") +
  theme(legend.position="none",
        axis.line = element_line(color = "#4d4d4d", size = 1),
        axis.text.x = element_text(color = "#4d4d4d", size = 12),
        axis.text.y = element_text(color = "#4d4d4d", size = 12),
        axis.title.x = element_text(color = "#4d4d4d", size = 14, 
                                    face = "bold"),
        axis.title.y = element_text(color = "#4d4d4d", size = 14, 
                                    face = "bold"),
        axis.ticks.x = element_line(color = "#4d4d4d", size = 1),
        axis.ticks.y = element_line(color = "#4d4d4d", size = 1))

ggMarginal(p, xparams = list(color = "#f8766d", fill = "#f8766d", alpha = 0.5, size = 1.25),
           yparams = list(color = "#00bfc4", fill = "#00bfc4", alpha = 0.5, size = 1.25))

Mean Prediction Error (MPE)

p <- ggplot(data = MPE, mapping = aes(x = MPE.Centroid, y = MPE.Width))
p + geom_point(aes(color = ID), alpha = 0.5, size = 3) + 
  geom_smooth(formula = y ~ x, method = "lm", size = 1.25, 
              color = "#4d4d4d") +
  theme_classic() +
  annotate("text", x = Inf, y = -15, 
           label = "paste(italic(R^2), \" = .63 \")",
           parse = T, hjust = 1, vjust = 0, color = "#4d4d4d") +
  labs(x = "MPE Centroid Size", y = "MPE Centrum Width") +
  theme(legend.position="none",
        axis.line = element_line(color = "#4d4d4d", size = 1),
        axis.text.x = element_text(color = "#4d4d4d", size = 12),
        axis.text.y = element_text(color = "#4d4d4d", size = 12),
        axis.title.x = element_text(color = "#4d4d4d", size = 14, 
                                    face = "bold"),
        axis.title.y = element_text(color = "#4d4d4d", size = 14, 
                                    face = "bold"),
        axis.ticks.x = element_line(color = "#4d4d4d", size = 1),
        axis.ticks.y = element_line(color = "#4d4d4d", size = 1))

summary(lm(data = MPE, MPE.Width ~ MPE.Centroid))

Call:
lm(formula = MPE.Width ~ MPE.Centroid, data = MPE)

Residuals:
    Min      1Q  Median      3Q     Max 
-4.7726 -2.4536 -0.7976  2.1232  6.0297 

Coefficients:
             Estimate Std. Error t value Pr(>|t|)    
(Intercept)  -0.05754    0.82072  -0.070 0.945031    
MPE.Centroid  0.91819    0.18205   5.044 0.000146 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 3.38 on 15 degrees of freedom
Multiple R-squared:  0.6291,    Adjusted R-squared:  0.6043 
F-statistic: 25.44 on 1 and 15 DF,  p-value: 0.0001456

ARCHAEOLOGICAL BODY SIZE ESTIMATIONS

setwd("Archaeological Estimates")
files    <- list.files(pattern = "\\.txt$")
results  <- data.frame()

for (i in seq_along(files)) {
  fname <- paste(files[i], sep="/")
  
  data <- read.table(fname, header = T, row.names = 1, 
                     stringsAsFactors = FALSE)
  
  a <-arrayspecs(data, ncol(data)/3, 3)
  
  mydata.gpa <- gpagen(a, curves = NULL, surfaces = NULL, PrinAxes = TRUE, 
                       max.iter = NULL, 
       ProcD = TRUE, Proj = TRUE, print.progress = FALSE)
  
  centroid.df <- data.frame(mydata.gpa$Csize)
  centroid.df <- tibble::rownames_to_column(centroid.df, "ID")
  
  centroid.clean <- centroid.df %>% 
  separate("ID", into = "ID") %>%
  merge(body.size, by="ID") %>%
  dplyr::rename(Csize = mydata.gpa.Csize)
  
  fit1 <- summary(lm(data = centroid.clean, SL ~ Csize))
  
  fit2 <- cor.test(centroid.clean$SL, centroid.clean$Csize,  
                   method = "spearman")
  
  Arch_Size <- (fit1$coefficients[[2]]*mydata.gpa$Csize[[1]])+
    fit1$coefficients[[1]]

  
  results[i,1] <- fit1$coefficients[2]
  results[i,2] <- mydata.gpa$Csize[[1]]
  results[i,3] <- fit1$coefficients[1]
  results[i,4] <- fit1$r.squared
  results[i,5] <- (fit2$estimate)^2
  results[i,6] <- Arch_Size
}

rownames(results) <- sub(".txt", "", files)
colnames(results) <- c("Slope", "Csize", "Intercept", "R2", "Rho2",
                       "Arch_SL")

round(results, digits = 2)
                      Slope Csize Intercept   R2 Rho2 Arch_SL
1304_BS               33.31 10.89    -21.54 0.90 0.89  341.36
1554_UV1              18.39 19.17    -38.22 0.81 0.61  314.26
1567_HYO              51.16  8.21     63.92 0.79 0.61  484.12
2005.27.142_HYO_UI30  51.16  5.26     63.92 0.79 0.61  332.85
2005.27.152_CV1_UI32  24.36 11.56     45.47 0.86 0.83  327.14
2005.27.152_CV2_UI32  43.17  6.60     48.47 0.87 0.85  333.31
2005.27.152_HYO_UI33  51.16  8.97     63.92 0.79 0.61  522.60
2005.27.157_UR_UI24   22.55 19.67     44.57 0.92 0.78  488.07
2005.27.158_CV_UI26   21.35 15.74     44.34 0.86 0.83  380.45
2005.27.161_UV_UI25   76.16  3.69     -1.53 0.92 0.88  279.78
2005.27.165_CV_UI29   23.17 16.89     44.46 0.87 0.84  435.78
2005.27.168_OPC_UI42  49.87  7.08     86.66 0.69 0.52  439.77
2005.27.169_CV1_U31   43.17  8.96     48.47 0.87 0.85  435.30
2005.27.169_CV2_U31   43.17  8.90     48.47 0.87 0.85  432.67
2005.27.169_CV3_U31   43.17  6.36     48.47 0.87 0.85  323.04
2005.27.169_CV4_U31   43.17  6.28     48.47 0.87 0.85  319.70
2005.27.173_CV1_U40   43.17  8.00     48.47 0.87 0.85  393.66
2005.27.173_CV2_U40   43.17  7.46     48.47 0.87 0.85  370.38
2005.27.331_CV_UI46   43.17  7.54     48.47 0.87 0.85  373.87
2005.27.331_UV_UI46   59.44  7.65      8.80 0.95 0.95  463.64
2005.27.458_CV_UI20   49.18  8.86     44.85 0.83 0.76  480.79
2007.46.1098_CV_UI3   43.17  9.73     48.47 0.87 0.85  468.56
2007.46.1100_SUB      15.11 32.93    -60.75 0.87 0.91  437.01
2007.46.2161_UI16_1   43.17  9.63     48.47 0.87 0.85  464.34
2007.46.2161_UI16_10  43.17  7.79     48.47 0.87 0.85  384.60
2007.46.2161_UI16_11  43.17  7.39     48.47 0.87 0.85  367.40
2007.46.2161_UI16_12  43.17  5.85     48.47 0.87 0.85  300.82
2007.46.2161_UI16_13  43.17  3.30     48.47 0.87 0.85  191.04
2007.46.2161_UI16_2   43.17  9.65     48.47 0.87 0.85  464.91
2007.46.2161_UI16_3   43.17  9.96     48.47 0.87 0.85  478.27
2007.46.2161_UI16_4   43.17  9.46     48.47 0.87 0.85  456.80
2007.46.2161_UI16_5   43.17  9.20     48.47 0.87 0.85  445.48
2007.46.2161_UI16_6   43.17  9.51     48.47 0.87 0.85  459.00
2007.46.2161_UI16_7   43.17  8.92     48.47 0.87 0.85  433.40
2007.46.2161_UI16_8   43.17  8.48     48.47 0.87 0.85  414.56
2007.46.2161_UI16_9   43.17  8.17     48.47 0.87 0.85  401.04
2007.46.2207_CV_UI15  43.17 10.66     48.47 0.87 0.85  508.51
2007.46.3104_BS_UI62  33.31 12.62    -21.54 0.90 0.89  398.77
2007.46.3442_CV_UI61  43.17 11.28     48.47 0.87 0.85  535.31
2007.46.4008_CT_UI53  24.24 20.57     -9.90 0.87 0.77  488.75
2007.46.4164_SUB_UI78 22.16 22.47      3.66 0.84 0.65  501.69
205.27.167_CV_UI43    52.22  9.23     48.15 0.90 0.89  530.09
205.27.167_HYO_UI43   18.17 12.57     84.31 0.70 0.46  312.58
523_MX                35.90  9.69     84.54 0.83 0.64  432.34
90.20.1199_BS_UI19    33.31 14.46    -21.54 0.90 0.89  460.17
99.20.1111_CT_UI68    26.86  8.10     74.48 0.54 0.44  291.98
99.20.1150_PT_UI58    26.88 11.55     15.55 0.99 0.98  325.84
99.20.160_UV_UI74     59.44  9.14      8.80 0.95 0.95  552.24
99.20.363_UV_UI73     18.39 21.42    -38.22 0.81 0.61  355.58
99.20.9_QUA_UI70      13.02 20.47     58.55 0.76 0.67  325.10
99.22.1765_CV_UI71    21.35 18.09     44.34 0.86 0.83  430.52
99.22.2640_OPC_UI72   49.87  8.04     86.66 0.69 0.52  487.53
99.22.3136_CV_UI8     21.35 18.37     44.34 0.86 0.83  436.54
99.22.882_OPC_UI11    49.87  7.36     86.66 0.69 0.52  453.59
BK.70.71_CT           26.86  8.11     74.48 0.54 0.44  292.29
BK.70.71_CV           43.17  6.43     48.47 0.87 0.85  326.11
BK.70.71_HYO          51.16  5.13     63.92 0.79 0.61  326.53
BK.70.71_MET          23.02 14.09      6.88 0.93 0.76  331.21
BK.70.71_UV           59.44  5.44      8.80 0.95 0.95  331.89
FN51_ENT              38.94  7.15    117.45 0.67 0.38  395.94

Visualize Archaeological Distribution

results %>%
  ggplot(aes(Arch_SL)) +
  geom_density(fill = "#00bfc4", color = "#00bfc4", bw = 50, alpha = 0.5, 
               size = 1.5) +
  theme_classic() +
  theme(legend.position="none",
        axis.line = element_line(color = "#4d4d4d", size = 1),
        plot.title = element_text(color = "#4d4d4d", size = 16, 
                                  face = "bold"),
        axis.text.x = element_text(color = "#4d4d4d", size = 12),
        axis.text.y = element_text(color = "#4d4d4d", size = 12),
        axis.title.x = element_text(color = "#4d4d4d", size = 14, 
                                    face = "bold"),
        axis.title.y = element_text(color = "#4d4d4d", size = 14, 
                                    face = "bold"),
        axis.ticks.x = element_line(color = "#4d4d4d", size = 1),
        axis.ticks.y = element_line(color = "#4d4d4d", size = 1)) +
        labs(title = "Archaeological Size Distribution", 
             x = "Standard Length (mm)", y = "Density") +
    xlim(0, 700)

MODERN COMPARISONS

Total Length (TL)

A length-length conversion factor from Standard Length (SL) to Total Length (TL) was applied to the archaeological SL estimates. All modern comparison data uses TL. TL could not be estimated per archaeological specimen considering that two specimens from the Museum of Southwestern Biology comparative library (25273, 50002, and 50003) do not have TL measurements. A SL to TL conversion factor of 1.27 was chosen by calculating the mean values available for Ictiobus bubalus and Carpiodes carpio on fishbase.de. Available here and here.

# convert archaeological SL to TL
results <- mutate(results, Arch_TL = Arch_SL*1.27)

# visualize archaeological distribution
results %>% 
  ggplot(aes(Arch_TL)) +
  geom_density(fill = "#00bfc4", color = "#00bfc4", bw = 50, alpha = 0.5, 
               size = 1.5) +
  theme_classic() +
  theme(legend.position="none",
        axis.line = element_line(color = "#4d4d4d", size = 1),
        plot.title = element_text(color = "#4d4d4d", size = 16, 
                                  face = "bold"),
        axis.text.x = element_text(color = "#4d4d4d", size = 12),
        axis.text.y = element_text(color = "#4d4d4d", size = 12),
        axis.title.x = element_text(color = "#4d4d4d", size = 14, 
                                    face = "bold"),
        axis.title.y = element_text(color = "#4d4d4d", size = 14, 
                                    face = "bold"),
        axis.ticks.x = element_line(color = "#4d4d4d", size = 1),
        axis.ticks.y = element_line(color = "#4d4d4d", size = 1)) +
        labs(title = "Archaeological Size Distribution", 
             x = "Total Length (mm)", y = "Density") +
    xlim(100, 900)

Error Associated with SL to TL Length-Length Conversion

TL and TL_estimate of specimens from the comparative library are almost perfectly correlated (R2 = 0.99; rho = 1). This means that error associated with the TL conversion factor (1.27) is extremely low. Further, the conversion factor will underestimate TL if there is error. This can be seen by visually inspecting the graph below.

body.size.estimate <- body.size %>%
  na.omit() %>%
  mutate(TL_estimate = SL * 1.27)

p <- ggplot(data = body.size.estimate, 
            mapping = aes(x = TL_estimate, y = TL))
p + geom_point(alpha = 0.5, size = 4) + 
  geom_smooth(formula = y ~ x, method = "lm", size = 1.25, 
              color = "#4d4d4d") +
  annotate("text", x = 400, y = 215, 
           label = "paste(italic(R) ^ 2, \" = .99, \", italic(rho),
           \" = 1 \")",
           parse = T, hjust = 1, vjust = 0, color = "#4d4d4d") +
  theme_classic() +
  labs(x = "Estimated Total Length (mm)", y = "Actual Total Length (mm)") +
  theme(legend.position="none",
        axis.line = element_line(color = "#4d4d4d", size = 1),
        axis.text.x = element_text(color = "#4d4d4d", size = 12),
        axis.text.y = element_text(color = "#4d4d4d", size = 12),
        axis.title.x = element_text(color = "#4d4d4d", size = 14, 
                                    face = "bold"),
        axis.title.y = element_text(color = "#4d4d4d", size = 14, 
                                    face = "bold"),
        axis.ticks.x = element_line(color = "#4d4d4d", size = 1),
        axis.ticks.y = element_line(color = "#4d4d4d", size = 1))

summary(lm(data = body.size.estimate, TL ~ TL_estimate))

Call:
lm(formula = TL ~ TL_estimate, data = body.size.estimate)

Residuals:
    Min      1Q  Median      3Q     Max 
-4.7035 -2.6240 -0.6684  1.8637 10.3026 

Coefficients:
            Estimate Std. Error t value Pr(>|t|)    
(Intercept) 18.76522    7.36499   2.548   0.0256 *  
TL_estimate  0.97281    0.02235  43.517 1.41e-14 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 3.968 on 12 degrees of freedom
Multiple R-squared:  0.9937,    Adjusted R-squared:  0.9932 
F-statistic:  1894 on 1 and 12 DF,  p-value: 1.41e-14
rho <-cor.test(body.size.estimate$TL, body.size.estimate$TL_estimate,  
               method = "spearman")
rho

    Spearman's rank correlation rho

data:  body.size.estimate$TL and body.size.estimate$TL_estimate
S = 0, p-value < 2.2e-16
alternative hypothesis: true rho is not equal to 0
sample estimates:
rho 
  1 

Calculating Modern Comparison

Archaeological Estimates

# specify breaks
breaks <- c(-Inf,200,250,300,350,400,450,500,550,Inf)

# specify bin labels
tags <- c("-199", "200-249", "250-299", "300-349", "350-399","400-449", 
          "450-499","500-549", "550+")

# put values into bins
group_tags <- cut(results$Arch_TL, 
                  breaks=breaks,
                  include.lowest=TRUE, 
                  right=FALSE,
                  labels=tags)

# plot
a <- as_tibble(summary(group_tags), rownames = "bins") %>%
  dplyr::rename(count = value) %>%
  mutate(percent = (count/sum(count)*100)) %>%
  dplyr::select(bins, percent) %>%
  mutate(time = "Archaeological")

Moody (1970)

# overall percentages (1967-1970) reported in Table 4 
b <- tibble(bins = c("-199", "200-249", "250-299", "300-349", "350-399", 
                     "400-449", "450-499","500-549", "550+"), 
          percent = c(0, 2, 3, 11, 15, 21, 36, 11, 1)) %>%
  mutate(time = "Commercial")

NM Game and Fish

NMgamefish <- read.table("Modern Comparison/NM Game and Fish.txt", 
                         header = TRUE)

# specify breaks
breaks <- c(-Inf, 200, 250, 300, 350, 400, 450, 500, 550, Inf)
# specify bin labels
tags <- c("-199", "200-249", "250-299", "300-349", "350-399","400-449", 
          "450-499","500-549", "550+")
# puttingvalues into bins
group_tags <- cut(NMgamefish$TL, 
                  breaks=breaks,
                  include.lowest=TRUE, 
                  right=FALSE,
                  labels=tags)

# plot
c <- as_tibble(summary(group_tags), rownames = "bins") %>%
  dplyr::rename(count = value) %>%
  mutate(percent = (count/sum(count)*100)) %>%
  dplyr::select(bins, percent) %>%
  mutate(time = "Non_Commercial")

Bind Together and Plot

d <- rbind(a, b, c)

d$time <- factor(d$time, levels = c("Commercial", "Non_Commercial", 
                                    "Archaeological"))

d$time2 <- factor(d$time, 
                  labels = c("Commercial Fishery (1967–1970)", 
                             "Non-Commercial Fishery (2011–2017)",
                             "Archaeological Fishery (ca. AD 1300-1600)"))

ggplot(d, aes(x = bins, y = percent)) +
  geom_bar(stat = "identity", size = 1.5) +
  facet_wrap(~ time2, nrow = 3) +
  scale_y_continuous(breaks = seq(0, 45, by = 15)) +
  theme_classic() +
  theme(legend.position="top",
        strip.text.x = element_text(color = "#4d4d4d", size = 12, 
                                    face = "bold"),
        strip.background = element_rect(color= NA, fill= NA),
        legend.title = element_blank(),
        axis.line = element_line(color = "#4d4d4d", size = 1),
        axis.text.x = element_text(color = "#4d4d4d", size = 7),
        axis.text.y = element_text(color = "#4d4d4d", size = 8),
        axis.title.x = element_text(color = "#4d4d4d", size = 10, 
                                    face = "bold"),
        axis.title.y = element_text(color = "#4d4d4d", size = 10, 
                                    face = "bold"),
        axis.ticks.x = element_line(color = "#4d4d4d", size = 1),
        axis.ticks.y = element_line(color = "#4d4d4d", size = 1)) +
        labs(x = "Total Length (mm)", y = "Percent (%)")

INTRA- AND INTERINDIVIDUAL ERROR TESTING

Intraobserver Error

Analyst 1 (Alexandra Harris)

Run a Generalized Procrustes Analysis for all Analyst 1 datafiles. Each .txt file pertains to a specimen and contains five replicate landmark configurations

setwd("Error Testing/Alex")
files    <- list.files(pattern = "\\.txt$")
my.list <- list()

for (i in seq_along(files)) {
  fname <- paste(files[i], sep="/")
  
  data <- read.table(fname, header = T, row.names = 1, 
                     stringsAsFactors = FALSE)
  
  a <-arrayspecs(data, ncol(data)/3, 3)
  
  mydata.gpa <- gpagen(a, curves = NULL, surfaces = NULL, PrinAxes = TRUE, 
                       max.iter = NULL, 
       ProcD = TRUE, Proj = TRUE, print.progress = FALSE)
  
my.list[[i]] <- mydata.gpa
}

Establish number of rows in each landmark configuration

rows <- rep(NA, 60)
for(i in seq_along(my.list)){
  rows[i] <- dim(my.list[[i]][["coords"]])[1]
}

Create lists out of all coordinates per replicate per specimen and all consensuses per specimen

# initiate
coords <- list()
for(i in 1:60){
  coords[[i]] <- array(NA, dim = c(rows[i], 3, 5))
}

# isolate coordinates per specimen per analyst per replicate
for(i in seq_along(my.list)){
  for(j in 1:5){
  coords[[i]][,,j] <- my.list[[i]][["coords"]][,,j]
  }
}

# initiate
consensus <- list()

# isolate consensus per specimen
for(i in seq_along(my.list)){
  consensus[[i]] <- my.list[[c(i, 4)]]
}

Calculate procd (procd = total Procrustes distance from consensus)

# initiate
output1 <- list()
for(i in 1:60){
  output1[[i]] <- array(NA, dim = c(rows[i], 3, 5))
}

# subtract and square
for(i in seq_along(coords)){
  for(j in 1:5){
    output1[[i]][,,j] <- (coords[[i]][,,j] - consensus[[i]])^2
  }
}

# initiate
output2 <- list()
for(i in 1:60){
  output2[[i]] <- array(NA, dim = c(1, rows[i], 5))
}

# sum rows
for(i in seq_along(output1)){
  for(j in 1:5){
    output2[[i]][,,j] <- rowSums(output1[[i]][,,j])
  }
}

# initiate
procd <- list()
for(i in 1:60){
  procd[[i]] <- array(NA, dim = c(1, 1, 5))
}

# sum and square root
for(i in 1:60){
  for(j in 1:5){
    procd[[i]][,,j] <- sqrt(sum(output2[[i]][,,j]))
  }
}

Transform procd and assign to Analyst 1

# create dataframe
procd <- data.frame(unlist(procd))

#subset data for Analyst 1 and transform
analyst1.procd <- procd %>%
  mutate(analyst = "Analyst 1",
         replicate = rep(c("1", "2", "3", "4", "5"), times = 60)) %>%
  rename(procd = colnames(procd)[1])

Analyst 2 (Jonathan Dombrosky)

Run a Generalized Procrustes Analysis for all Analyst 2 datafiles. Each .txt file pertains to a specimen and contains five replicate landmark configurations

setwd("Error Testing/Jon")
files    <- list.files(pattern = "\\.txt$")
my.list <- list()

for (i in seq_along(files)) {
  fname <- paste(files[i], sep="/")
  
  data <- read.table(fname, header = T, row.names = 1, 
                     stringsAsFactors = FALSE)
  
  a <-arrayspecs(data, ncol(data)/3, 3)
  
  mydata.gpa <- gpagen(a, curves = NULL, surfaces = NULL, PrinAxes = TRUE, 
                       max.iter = NULL, 
       ProcD = TRUE, Proj = TRUE, print.progress = FALSE)
  
my.list[[i]] <- mydata.gpa
}

Establish number of rows in each landmark configuration

rows <- rep(NA, 60)
for(i in seq_along(my.list)){
  rows[i] <- dim(my.list[[i]][["coords"]])[1]
}

Create lists out of all coordinates per replicate per specimen and all consensuses per specimen

# initiate
coords <- list()
for(i in 1:60){
  coords[[i]] <- array(NA, dim = c(rows[i], 3, 5))
}

# isolate coordinates per specimen per analyst per replicate
for(i in seq_along(my.list)){
  for(j in 1:5){
  coords[[i]][,,j] <- my.list[[i]][["coords"]][,,j]
  }
}

# initiate
consensus <- list()

# isolate consensus per specimen
for(i in seq_along(my.list)){
  consensus[[i]] <- my.list[[c(i, 4)]]
}

Calculate procd (procd = total Procrustes distance from consensus)

# initiate
output1 <- list()
for(i in 1:60){
  output1[[i]] <- array(NA, dim = c(rows[i], 3, 5))
}

# subtract and square
for(i in seq_along(coords)){
  for(j in 1:5){
    output1[[i]][,,j] <- (coords[[i]][,,j] - consensus[[i]])^2
  }
}

# initiate
output2 <- list()
for(i in 1:60){
  output2[[i]] <- array(NA, dim = c(1, rows[i], 5))
}

# sum rows
for(i in seq_along(output1)){
  for(j in 1:5){
    output2[[i]][,,j] <- rowSums(output1[[i]][,,j])
  }
}

# initiate
procd <- list()
for(i in 1:60){
  procd[[i]] <- array(NA, dim = c(1, 1, 5))
}

# sum and square root
for(i in 1:60){
  for(j in 1:5){
    procd[[i]][,,j] <- sqrt(sum(output2[[i]][,,j]))
  }
}

Transform procd and assign to Analyst 2

# create dataframe
procd <- data.frame(unlist(procd))

#subset data for Analyst 1 and transform
analyst2.procd <- procd %>%
  mutate(analyst = "Analyst 2",
         replicate = rep(c("1", "2", "3", "4", "5"), times = 60)) %>%
  rename(procd = colnames(procd)[1])

Bind Analyst 1 and Analyst 2 datasets

procd <- rbind(analyst1.procd, analyst2.procd)

Visualize and Significance Testing

One way ANOVA tests indicate that the mean values of procd are equal between replicates for Analyst 1 (p = 0.91) and Analyst 2 (p = 0.31). Further, the effect size between replicates is extremely small for Analyst 1 (Eta2 < 0.01) and Analyst 2 (Eta2 = 0.02). The landmarking configurations on the entire archaeological dataset are practically indistinguishable between replicates of the same analyst.

procd %>%
  ggplot(mapping = aes(x = replicate, y = procd, group = replicate, 
                       fill = replicate, color = replicate)) +
  geom_boxplot(size = 0.75, alpha = 0.5, outlier.alpha = 0.5, 
               outlier.size = 2.5) +
  facet_wrap(~ analyst, nrow = 2) +
  theme_classic() +
  theme(legend.position= "none",
        strip.background = element_blank(),
        strip.text.x = element_text(color = "#4d4d4d", size = 16, 
                                    face = "bold"),
        axis.line = element_line(color = "#4d4d4d", size = 1),
        axis.text.x = element_text(color = "#4d4d4d", size = 12),
        axis.text.y = element_text(color = "#4d4d4d", size = 12),
        axis.title.x = element_text(color = "#4d4d4d", size = 14, 
                                    face = "bold"),
        axis.title.y = element_text(color = "#4d4d4d", size = 14, 
                                    face = "bold"),
        axis.ticks.x = element_line(color = "#4d4d4d", size = 1),
        axis.ticks.y = element_line(color = "#4d4d4d", size = 1)) +
        labs(x = "Replicate", y = "Total Procrustes Distance\nfrom Consensus")

oneway.analyst1 <- aov(procd ~ replicate, data = analyst1.procd)
summary(oneway.analyst1)
             Df  Sum Sq   Mean Sq F value Pr(>F)
replicate     4 0.00096 0.0002397   0.254  0.907
Residuals   295 0.27886 0.0009453               
eta_squared(oneway.analyst1)
For one-way between subjects designs, partial eta squared is equivalent
  to eta squared. Returning eta squared.
# Effect Size for ANOVA

Parameter |     Eta2 |       95% CI
-----------------------------------
replicate | 3.43e-03 | [0.00, 1.00]

- One-sided CIs: upper bound fixed at [1.00].
oneway.analyst2 <- aov(procd ~ replicate, data = analyst2.procd)
summary(oneway.analyst2)
             Df Sum Sq  Mean Sq F value Pr(>F)
replicate     4 0.0188 0.004707   1.193  0.314
Residuals   295 1.1636 0.003944               
eta_squared(oneway.analyst2)
For one-way between subjects designs, partial eta squared is equivalent
  to eta squared. Returning eta squared.
# Effect Size for ANOVA

Parameter | Eta2 |       95% CI
-------------------------------
replicate | 0.02 | [0.00, 1.00]

- One-sided CIs: upper bound fixed at [1.00].

Swarmplot

library(ggbeeswarm)

procd %>%
  ggplot(mapping = aes(x = replicate, y = procd, group = replicate, 
                       fill = replicate, color = replicate)) +
  geom_beeswarm(size = 4, cex = 0.8, alpha = 0.2) +
  geom_beeswarm(size = 4, cex = 0.8, shape = 21, fill = NA) +
  facet_wrap(~ analyst, nrow = 2) +
  theme_classic() +
  theme(legend.position= "none",
        strip.background = element_blank(),
        strip.text.x = element_text(color = "#4d4d4d", size = 16, 
                                    face = "bold"),
        axis.line = element_line(color = "#4d4d4d", size = 1),
        axis.text.x = element_text(color = "#4d4d4d", size = 12),
        axis.text.y = element_text(color = "#4d4d4d", size = 12),
        axis.title.x = element_text(color = "#4d4d4d", size = 14, 
                                    face = "bold"),
        axis.title.y = element_text(color = "#4d4d4d", size = 14, 
                                    face = "bold"),
        axis.ticks.x = element_line(color = "#4d4d4d", size = 1),
        axis.ticks.y = element_line(color = "#4d4d4d", size = 1)) +
        labs(x = "Replicate", y = "Total Procrustes Distance from Consensus")

Interobserver Error

Run a Generalized Procrustes Analysis for all datafiles (both Analyst 1 and Analyst 2 combined). Each .txt file pertains to a specimen and contains five replicate landmark configurations per analyst.

setwd("Error Testing/Both")
files    <- list.files(pattern = "\\.txt$")
my.list <- list()

for (i in seq_along(files)) {
  fname <- paste(files[i], sep="/")
  
  data <- read.table(fname, header = T, row.names = 1, 
                     stringsAsFactors = FALSE)
  
  a <-arrayspecs(data, ncol(data)/3, 3)
  
  mydata.gpa <- gpagen(a, curves = NULL, surfaces = NULL, PrinAxes = TRUE, 
                       max.iter = NULL, 
       ProcD = TRUE, Proj = TRUE, print.progress = FALSE)
  
my.list[[i]] <- mydata.gpa
}

Establish number of rows in each landmark configuration

rows <- rep(NA, 60)
for(i in seq_along(my.list)){
  rows[i] <- dim(my.list[[i]][["coords"]])[1]
}

Create lists out of all coordinates per replicate per analyst per specimen and all consensuses per specimen

# initiate
coords <- list()
for(i in 1:60){
  coords[[i]] <- array(NA, dim = c(rows[i], 3, 10))
}

# isolate coordinates per specimen per analyst per replicate
for(i in seq_along(my.list)){
  for(j in 1:10){
  coords[[i]][,,j] <- my.list[[i]][["coords"]][,,j]
  }
}

# initiate
consensus <- list()

# isolate consensus per specimen
for(i in seq_along(my.list)){
  consensus[[i]] <- my.list[[c(i, 4)]]
}

Calculate procd (procd = total Procrustes distance from consensus)

# initiate
output1 <- list()
for(i in 1:60){
  output1[[i]] <- array(NA, dim = c(rows[i], 3, 10))
}

# subtract and square
for(i in seq_along(coords)){
  for(j in 1:10){
    output1[[i]][,,j] <- (coords[[i]][,,j] - consensus[[i]])^2
  }
}

# initiate
output2 <- list()
for(i in 1:60){
  output2[[i]] <- array(NA, dim = c(1, rows[i], 10))
}

# sum rows
for(i in seq_along(output1)){
  for(j in 1:10){
    output2[[i]][,,j] <- rowSums(output1[[i]][,,j])
  }
}

# initiate
procd <- list()
for(i in 1:60){
  procd[[i]] <- array(NA, dim = c(1, 1, 10))
}

# sum and square root
for(i in 1:60){
  for(j in 1:10){
    procd[[i]][,,j] <- sqrt(sum(output2[[i]][,,j]))
  }
}

Transform procd

# create dataframe
procd <- data.frame(unlist(procd))

#subset data for Analyst 1 and transform
analyst1.procd <- data.frame(procd[c(rep(TRUE, 5), rep(FALSE, 5)),])

analyst1.procd <- analyst1.procd %>%
  mutate(analyst = "1") %>%
  rename(procd = colnames(analyst1.procd)[1])

#subset data for Analyst 2 and transform
analyst2.procd <- data.frame(procd[c(rep(FALSE, 5), rep(TRUE, 5)),])

analyst2.procd <- analyst2.procd %>%
  mutate(analyst = "2") %>%
  rename(procd = colnames(analyst2.procd)[1])

#bind transformed datasets for Analyst 1 and 2
procd <- rbind(analyst1.procd, analyst2.procd)

Visualize and Significance Testing

An independent t-test indicates that the mean values of procd between Analyst 1 and Analyst 2 are equal (p = 0.89). Further, the effect size between the two means is extremely small (Cohen’s d = 0.01). The landmarking configuration on the entire archaeological dataset (replicated five times) is practically indistinguishable between Analyst 1 and Analyst 2.

procd %>%
  ggplot(mapping = aes(x = analyst, y = procd, group = analyst, 
                       fill = analyst, color = analyst)) +
  geom_boxplot(size = 0.75, alpha = 0.5, outlier.alpha = 0.5, 
               outlier.size = 2.5) +
  theme_classic() +
  theme(legend.position= "none",
        strip.background = element_blank(),
        strip.text.x = element_text(color = "#4d4d4d", size = 16, 
                                    face = "bold"),
        axis.line = element_line(color = "#4d4d4d", size = 1),
        axis.text.x = element_text(color = "#4d4d4d", size = 12),
        axis.text.y = element_text(color = "#4d4d4d", size = 12),
        axis.title.x = element_text(color = "#4d4d4d", size = 14, 
                                    face = "bold"),
        axis.title.y = element_text(color = "#4d4d4d", size = 14, 
                                    face = "bold"),
        axis.ticks.x = element_line(color = "#4d4d4d", size = 1),
        axis.ticks.y = element_line(color = "#4d4d4d", size = 1)) +
        labs(x = "Analyst", y = "Total Procrustes Distance\n from Consensus")

t.test <- t.test(analyst1.procd$procd, analyst2.procd$procd)
t.test

    Welch Two Sample t-test

data:  analyst1.procd$procd and analyst2.procd$procd
t = -0.13727, df = 570.74, p-value = 0.8909
alternative hypothesis: true difference in means is not equal to 0
95 percent confidence interval:
 -0.01440174  0.01252025
sample estimates:
 mean of x  mean of y 
0.05045055 0.05139130 
cohens_d(t.test)
Cohen's d |        95% CI
-------------------------
-0.01     | [-0.17, 0.15]

- Estimated using un-pooled SD.

Swarmplot

procd %>%
  ggplot(mapping = aes(x = analyst, y = procd, group = analyst, 
                       fill = analyst, color = analyst)) +
  geom_beeswarm(size = 4, cex = 0.8, alpha = 0.2) +
  geom_beeswarm(size = 4, cex = 0.8, shape = 21, fill = NA) +
  theme_classic() +
  theme(legend.position= "none",
        strip.background = element_blank(),
        strip.text.x = element_text(color = "#4d4d4d", size = 16, 
                                    face = "bold"),
        axis.line = element_line(color = "#4d4d4d", size = 1),
        axis.text.x = element_text(color = "#4d4d4d", size = 12),
        axis.text.y = element_text(color = "#4d4d4d", size = 12),
        axis.title.x = element_text(color = "#4d4d4d", size = 14, 
                                    face = "bold"),
        axis.title.y = element_text(color = "#4d4d4d", size = 14, 
                                    face = "bold"),
        axis.ticks.x = element_line(color = "#4d4d4d", size = 1),
        axis.ticks.y = element_line(color = "#4d4d4d", size = 1)) +
        labs(x = "Analyst", y = "Total Procrustes Distance\n from Consensus")